if (!file.exists("met_all.gz"))
download.file(
url = "https://raw.githubusercontent.com/USCbiostats/data-science-data/master/02_met/met_all.gz",
destfile = "met_all.gz",
method = "libcurl",
timeout = 60
)
met <- data.table::fread("met_all.gz")Lab4 Homework
1.
Remove temps less than -17C, make sure there are no missing values
met <- met[met$temp > -17][elev == 9999.0, elev := NA][wind.sp == 9999.0, wind.sp := NA][dew.point == 9999.0, dew.point := NA][rh == 9999.0, rh := NA]Date variable
met$date_variable <- as.Date(paste(met$year, met$month, met$day, sep = "-"))
print(met) USAFID WBAN year month day hour min lat lon elev wind.dir
1: 690150 93121 2019 8 1 0 56 34.300 -116.166 696 220
2: 690150 93121 2019 8 1 1 56 34.300 -116.166 696 230
3: 690150 93121 2019 8 1 2 56 34.300 -116.166 696 230
4: 690150 93121 2019 8 1 3 56 34.300 -116.166 696 210
5: 690150 93121 2019 8 1 4 56 34.300 -116.166 696 120
---
2317200: 726813 94195 2019 8 31 19 56 43.650 -116.633 741 70
2317201: 726813 94195 2019 8 31 20 56 43.650 -116.633 741 NA
2317202: 726813 94195 2019 8 31 21 56 43.650 -116.633 741 10
2317203: 726813 94195 2019 8 31 22 56 43.642 -116.636 741 10
2317204: 726813 94195 2019 8 31 23 56 43.642 -116.636 741 40
wind.dir.qc wind.type.code wind.sp wind.sp.qc ceiling.ht ceiling.ht.qc
1: 5 N 5.7 5 22000 5
2: 5 N 8.2 5 22000 5
3: 5 N 6.7 5 22000 5
4: 5 N 5.1 5 22000 5
5: 5 N 2.1 5 22000 5
---
2317200: 5 N 2.1 5 22000 5
2317201: 9 C 0.0 5 22000 5
2317202: 5 N 2.6 5 22000 5
2317203: 1 N 2.1 1 22000 1
2317204: 1 N 2.1 1 22000 1
ceiling.ht.method sky.cond vis.dist vis.dist.qc vis.var vis.var.qc
1: 9 N 16093 5 N 5
2: 9 N 16093 5 N 5
3: 9 N 16093 5 N 5
4: 9 N 16093 5 N 5
5: 9 N 16093 5 N 5
---
2317200: 9 N 16093 5 N 5
2317201: 9 N 16093 5 N 5
2317202: 9 N 14484 5 N 5
2317203: 9 N 16093 1 9 9
2317204: 9 N 16093 1 9 9
temp temp.qc dew.point dew.point.qc atm.press atm.press.qc rh
1: 37.2 5 10.6 5 1009.9 5 19.88127
2: 35.6 5 10.6 5 1010.3 5 21.76098
3: 34.4 5 7.2 5 1010.6 5 18.48212
4: 33.3 5 5.0 5 1011.6 5 16.88862
5: 32.8 5 5.0 5 1012.7 5 17.38410
---
2317200: 32.2 5 12.2 5 1012.8 5 29.40686
2317201: 33.3 5 12.2 5 1011.6 5 27.60422
2317202: 35.0 5 9.4 5 1010.8 5 20.76325
2317203: 34.4 1 9.4 1 1010.1 1 21.48631
2317204: 34.4 1 9.4 1 1009.6 1 21.48631
date_variable
1: 2019-08-01
2: 2019-08-01
3: 2019-08-01
4: 2019-08-01
5: 2019-08-01
---
2317200: 2019-08-31
2317201: 2019-08-31
2317202: 2019-08-31
2317203: 2019-08-31
2317204: 2019-08-31
Keep the first week of the month
met$weeknumber <- data.table::week(as.Date(paste(met$year, met$month, met$day, sep = "-")))
print(met) USAFID WBAN year month day hour min lat lon elev wind.dir
1: 690150 93121 2019 8 1 0 56 34.300 -116.166 696 220
2: 690150 93121 2019 8 1 1 56 34.300 -116.166 696 230
3: 690150 93121 2019 8 1 2 56 34.300 -116.166 696 230
4: 690150 93121 2019 8 1 3 56 34.300 -116.166 696 210
5: 690150 93121 2019 8 1 4 56 34.300 -116.166 696 120
---
2317200: 726813 94195 2019 8 31 19 56 43.650 -116.633 741 70
2317201: 726813 94195 2019 8 31 20 56 43.650 -116.633 741 NA
2317202: 726813 94195 2019 8 31 21 56 43.650 -116.633 741 10
2317203: 726813 94195 2019 8 31 22 56 43.642 -116.636 741 10
2317204: 726813 94195 2019 8 31 23 56 43.642 -116.636 741 40
wind.dir.qc wind.type.code wind.sp wind.sp.qc ceiling.ht ceiling.ht.qc
1: 5 N 5.7 5 22000 5
2: 5 N 8.2 5 22000 5
3: 5 N 6.7 5 22000 5
4: 5 N 5.1 5 22000 5
5: 5 N 2.1 5 22000 5
---
2317200: 5 N 2.1 5 22000 5
2317201: 9 C 0.0 5 22000 5
2317202: 5 N 2.6 5 22000 5
2317203: 1 N 2.1 1 22000 1
2317204: 1 N 2.1 1 22000 1
ceiling.ht.method sky.cond vis.dist vis.dist.qc vis.var vis.var.qc
1: 9 N 16093 5 N 5
2: 9 N 16093 5 N 5
3: 9 N 16093 5 N 5
4: 9 N 16093 5 N 5
5: 9 N 16093 5 N 5
---
2317200: 9 N 16093 5 N 5
2317201: 9 N 16093 5 N 5
2317202: 9 N 14484 5 N 5
2317203: 9 N 16093 1 9 9
2317204: 9 N 16093 1 9 9
temp temp.qc dew.point dew.point.qc atm.press atm.press.qc rh
1: 37.2 5 10.6 5 1009.9 5 19.88127
2: 35.6 5 10.6 5 1010.3 5 21.76098
3: 34.4 5 7.2 5 1010.6 5 18.48212
4: 33.3 5 5.0 5 1011.6 5 16.88862
5: 32.8 5 5.0 5 1012.7 5 17.38410
---
2317200: 32.2 5 12.2 5 1012.8 5 29.40686
2317201: 33.3 5 12.2 5 1011.6 5 27.60422
2317202: 35.0 5 9.4 5 1010.8 5 20.76325
2317203: 34.4 1 9.4 1 1010.1 1 21.48631
2317204: 34.4 1 9.4 1 1009.6 1 21.48631
date_variable weeknumber
1: 2019-08-01 31
2: 2019-08-01 31
3: 2019-08-01 31
4: 2019-08-01 31
5: 2019-08-01 31
---
2317200: 2019-08-31 35
2317201: 2019-08-31 35
2317202: 2019-08-31 35
2317203: 2019-08-31 35
2317204: 2019-08-31 35
Calculate averages
met_avg <- met[,.(
temp = mean(temp,na.rm=TRUE),
rh = mean(rh,na.rm=TRUE),
wind.sp = mean(wind.sp,na.rm=TRUE),
vis.dist = mean(vis.dist,na.rm=TRUE),
lat = mean(lat),
lon = mean(lon),
elev = mean(elev,na.rm=TRUE),
dew.point = mean(dew.point,na.rm=TRUE)
), by=c("USAFID", "day")]
met_avg USAFID day temp rh wind.sp vis.dist lat lon
1: 690150 1 32.59583 20.65546 2.895833 15958.92 34.29967 -116.1657
2: 690150 2 33.54167 13.18436 4.154167 16093.00 34.30000 -116.1660
3: 690150 3 34.30833 14.20129 4.433333 16025.96 34.30000 -116.1660
4: 690150 4 34.72083 11.70307 4.245833 16093.00 34.30000 -116.1660
5: 690150 5 34.77391 13.20827 3.660870 15953.09 34.30000 -116.1660
---
48714: 726813 27 20.05833 44.96461 1.579167 15891.88 43.65000 -116.6330
48715: 726813 28 21.18333 46.79024 1.175000 15623.67 43.65000 -116.6330
48716: 726813 29 24.32083 49.07651 1.733333 16025.96 43.65000 -116.6330
48717: 726813 30 24.54583 50.43012 3.508333 16025.96 43.64967 -116.6331
48718: 726813 31 24.27917 51.08570 1.208333 16025.96 43.64933 -116.6332
elev dew.point
1: 690.0833 6.3000000
2: 696.0000 1.2166667
3: 696.0000 2.4958333
4: 696.0000 0.5541667
5: 696.0000 1.4956522
---
48714: 741.0000 5.9375000
48715: 741.0000 6.8041667
48716: 741.0000 10.8083333
48717: 741.0000 12.3083333
48718: 741.0000 11.6875000
Create region variable
met_avg$Regionlat <- cut(met_avg$lat, breaks=c(24.55525, 39.71 , 48.941), labels=c('N' , 'S'))
met_avg$Regionlon <- cut(met_avg$lon, breaks=c(-124.29, -98.00 , -68.313), labels=c('E' , 'W'))
met_avg$Region<- paste(met_avg$Regionlat, met_avg$Regionlon, sep = "")
print(met_avg) USAFID day temp rh wind.sp vis.dist lat lon
1: 690150 1 32.59583 20.65546 2.895833 15958.92 34.29967 -116.1657
2: 690150 2 33.54167 13.18436 4.154167 16093.00 34.30000 -116.1660
3: 690150 3 34.30833 14.20129 4.433333 16025.96 34.30000 -116.1660
4: 690150 4 34.72083 11.70307 4.245833 16093.00 34.30000 -116.1660
5: 690150 5 34.77391 13.20827 3.660870 15953.09 34.30000 -116.1660
---
48714: 726813 27 20.05833 44.96461 1.579167 15891.88 43.65000 -116.6330
48715: 726813 28 21.18333 46.79024 1.175000 15623.67 43.65000 -116.6330
48716: 726813 29 24.32083 49.07651 1.733333 16025.96 43.65000 -116.6330
48717: 726813 30 24.54583 50.43012 3.508333 16025.96 43.64967 -116.6331
48718: 726813 31 24.27917 51.08570 1.208333 16025.96 43.64933 -116.6332
elev dew.point Regionlat Regionlon Region
1: 690.0833 6.3000000 N E NE
2: 696.0000 1.2166667 N E NE
3: 696.0000 2.4958333 N E NE
4: 696.0000 0.5541667 N E NE
5: 696.0000 1.4956522 N E NE
---
48714: 741.0000 5.9375000 S E SE
48715: 741.0000 6.8041667 S E SE
48716: 741.0000 10.8083333 S E SE
48717: 741.0000 12.3083333 S E SE
48718: 741.0000 11.6875000 S E SE
Categorical variable for elevation
met_avg[, elev_cat := ifelse(elev > 252, "high", "low")]
met_avg[, vis_cat := cut(
x = vis.dist,
breaks = c(0, 1000, 6000, 10000, Inf),
labels = c("fog", "mist", "haze", "clear"),
right = FALSE
)]
print(met_avg) USAFID day temp rh wind.sp vis.dist lat lon
1: 690150 1 32.59583 20.65546 2.895833 15958.92 34.29967 -116.1657
2: 690150 2 33.54167 13.18436 4.154167 16093.00 34.30000 -116.1660
3: 690150 3 34.30833 14.20129 4.433333 16025.96 34.30000 -116.1660
4: 690150 4 34.72083 11.70307 4.245833 16093.00 34.30000 -116.1660
5: 690150 5 34.77391 13.20827 3.660870 15953.09 34.30000 -116.1660
---
48714: 726813 27 20.05833 44.96461 1.579167 15891.88 43.65000 -116.6330
48715: 726813 28 21.18333 46.79024 1.175000 15623.67 43.65000 -116.6330
48716: 726813 29 24.32083 49.07651 1.733333 16025.96 43.65000 -116.6330
48717: 726813 30 24.54583 50.43012 3.508333 16025.96 43.64967 -116.6331
48718: 726813 31 24.27917 51.08570 1.208333 16025.96 43.64933 -116.6332
elev dew.point Regionlat Regionlon Region elev_cat vis_cat
1: 690.0833 6.3000000 N E NE high clear
2: 696.0000 1.2166667 N E NE high clear
3: 696.0000 2.4958333 N E NE high clear
4: 696.0000 0.5541667 N E NE high clear
5: 696.0000 1.4956522 N E NE high clear
---
48714: 741.0000 5.9375000 S E SE high clear
48715: 741.0000 6.8041667 S E SE high clear
48716: 741.0000 10.8083333 S E SE high clear
48717: 741.0000 12.3083333 S E SE high clear
48718: 741.0000 11.6875000 S E SE high clear
3.
Geom_violin to examine wind speed and dew point by region
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
library(magrittr)
library(tidyverse)── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats 1.0.0 ✔ readr 2.1.4
✔ ggplot2 3.4.3 ✔ stringr 1.5.0
✔ lubridate 1.9.2 ✔ tibble 3.2.1
✔ purrr 1.0.2 ✔ tidyr 1.3.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ tidyr::extract() masks magrittr::extract()
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
✖ purrr::set_names() masks magrittr::set_names()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(data.table)
Attaching package: 'data.table'
The following objects are masked from 'package:lubridate':
hour, isoweek, mday, minute, month, quarter, second, wday, week,
yday, year
The following object is masked from 'package:purrr':
transpose
The following objects are masked from 'package:dplyr':
between, first, last
met_avg[!is.na(dew.point)] %>%
ggplot() +
geom_violin(mapping = aes(x = 1, y = dew.point), fill = "green") met_avg[!is.na(dew.point)][!is.na(wind.sp)] %>%
ggplot(aes(x = Region)) +
geom_violin(aes(y = wind.sp, fill = "Wind Speed"), width = 0.7) +
geom_violin(aes(y = dew.point, fill = "Dew Point"), width = 0.7) +
labs(title = "Wind Speed and Dew Point by Region", y = "Values", fill = NULL) +
theme_minimal() +
theme(axis.text.x = element_blank()) +
scale_fill_manual(values = c("lightblue", "lightgreen")) 4.
Geom_jitter and stat_smooth to examine wind speed and dew point by region
met_avg[!is.na(dew.point)][!is.na(wind.sp)] %>%
ggplot(aes(x = dew.point, y = wind.sp, color = Region)) +
geom_jitter(width = 0.2, height = 0.2, alpha = 0.5) +
stat_smooth(method = "lm", se = FALSE) +
labs(title = "Association between Dew Point and Wind Speed by Region",
x = "Dew Point", y = "Wind Speed") +
scale_color_brewer(palette = "Set1") +
theme_minimal()`geom_smooth()` using formula = 'y ~ x'
5.
Barcharts of weather stations by elevation category colored by region
met_avg[!is.na(dew.point)][!is.na(wind.sp)] %>%
ggplot(aes(x = elev_cat, fill = Region)) +
geom_bar(position = "dodge") +
scale_fill_brewer(palette = "Set1") +
labs(title = "Weather Stations by Elevation Category", x = "Elevation Category", y = "Count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))6.
Stat Summary to examine mean dew point and wind speed by region with standard deviation error bars
met_avg[!is.na(dew.point)][!is.na(wind.sp)] %>%
ggplot() +
stat_summary(
mapping = aes(x = Region, y = dew.point),
fun.data = "mean_sdl",
fun.args = list(mult = 1),
geom = "errorbar",
position = position_dodge(width = 0.7),
width = 0.2,
color = "blue" # Color for dew point error bars
) +
stat_summary(
mapping = aes(x = Region, y = wind.sp),
fun.data = "mean_sdl",
fun.args = list(mult = 1),
geom = "errorbar",
position = position_dodge(width = 0.4),
width = 0.2,
color = "green" # Color for wind speed error bars
) +
labs(
title = "Mean Dew Point and Wind Speed by Region",
x = "Region",
y = "Value"
) +
theme_minimal()Dew point is higher in the Northwest
7.
Make a map showing the spatial trend in relative humidity in the US
library(leaflet)
met_avg3 <- subset(met_avg,select = c("USAFID", "rh","lat", "lon"))
met_avg3 <- na.omit(met_avg3)
rh.pal <- colorNumeric(c('darkgreen','goldenrod','brown'), domain=met_avg3$rh)
rh.palfunction (x)
{
if (length(x) == 0 || all(is.na(x))) {
return(pf(x))
}
if (is.null(rng))
rng <- range(x, na.rm = TRUE)
rescaled <- scales::rescale(x, from = rng)
if (any(rescaled < 0 | rescaled > 1, na.rm = TRUE))
warning("Some values were outside the color scale and will be treated as NA")
if (reverse) {
rescaled <- 1 - rescaled
}
pf(rescaled)
}
<bytecode: 0x11c522530>
<environment: 0x11c520f50>
attr(,"colorType")
[1] "numeric"
attr(,"colorArgs")
attr(,"colorArgs")$na.color
[1] "#808080"
rhmap <- leaflet(met_avg3) %>%
# The looks of the Map
addProviderTiles('CartoDB.Positron') %>%
# Some circles
addCircles(
lat = ~lat, lng=~lon,
label = ~paste0(round(rh,2), ' C'), color = ~ rh.pal(rh),
opacity = 1, fillOpacity = 1, radius = 500
) %>%
addLegend('bottomleft', pal = rh.pal, values=met_avg3$rh,
title='Relative Humidity, C', opacity=1)
print(rhmap)
rhmapIt is hotter on the coasts.
Use a ggplot extension: gganimate
library(ggplot2)
library(gganimate)
ggplot(met_avg, aes(factor(elev_cat), dew.point)) +
geom_boxplot() +
transition_states(
elev_cat,
transition_length = 2,
state_length = 1
) +
enter_fade() +
exit_shrink() +
ease_aes('sine-in-out')Warning: Removed 20 rows containing non-finite values (`stat_boxplot()`).